1 Multiblocks data analysis

1.1 The RGCCA and SGCCA approaches

(Tenenhaus et al. 2014)(https://doi.org/10.1093/biostatistics/kxu001)

(Tenenhaus and Tenenhaus 2011)(https://doi.org/10.1007/s11336-011-9206-8)

1.2 The mixOmics software

(Rohart et al. 2017)(https://doi.org/10.1371/journal.pcbi.1005752)

1.3 Application to the ProMetIS dataset

selvar.i <- 20 # number of features to select

latmx2.mset <- phenomis::reading(ProMetIS::statistics_singleomics_dir.c(),
                                 report.c = "none")

sgccda.ls <- list()

for (gene.c in ProMetIS::genes.vc()) {
  
  # gene.c <- "LAT"
  wtgene.vc <- c("WT", gene.c)
  
  for (tissue.c in ProMetIS::tissues.vc()) {
    
    # tissue.c <- "plasma"
    
    mset <- ProMetIS::subsetting(latmx2.mset,
                                 genes.vc = wtgene.vc,
                                 tissues.vc = tissue.c,
                                 common_samples.l = TRUE)
    
    # mset <- ProMetIA::abbrev_mset(mset)
    
    preclinical.eset <- mset[["preclinical"]]
    
    gene.fc <- factor(Biobase::pData(preclinical.eset)[, "gene"],
                      levels = wtgene.vc)
    sex.fc <- factor(Biobase::pData(preclinical.eset)[, "sex"],
                      levels = ProMetIS::sex.vc())
    
    mset <- mset[, setdiff(names(mset), "preclinical")]
    
    exprs.ls <- MultiDataSet::as.list(mset)
    
    ## Correcting the 'sex' effect
    
    exprs.ls <- lapply(exprs.ls,
                       function(exprs.mn)
                         t(sva::ComBat(exprs.mn, sex.fc)))
    
    ## Design matrix
    
    # We create the design matrix. The weights were set to 0.1 between the blocks of omics and to 1 between the blocks of omics and the response block.
    
    design.mn <- matrix(0.1,
                        ncol = length(exprs.ls),
                        nrow = length(exprs.ls), 
                        dimnames = list(names(exprs.ls),
                                        names(exprs.ls)))

    diag(design.mn) <- 0
    
    keep_x.ls <- lapply(names(exprs.ls),
                        function(set.c) rep(selvar.i, 2))
    names(keep_x.ls) <- names(exprs.ls)
    
    block_splsda <- mixOmics::block.splsda(X = exprs.ls,
                                           Y = gene.fc, 
                                           ncomp = 2,
                                           keepX = keep_x.ls,
                                           design = design.mn)
    
    gene_tissue.c <- paste0(gene.c, "_", tissue.c)
    
    sgccda.ls[[gene_tissue.c]] <- list(gene.fc = gene.fc,
                                       sex.fc = sex.fc,
                                       exprs.ls = exprs.ls,
                                       block_splsda = block_splsda)
    
  }
}
## Found 2 genes with uniform expression within a single batch (all zeros); these will not be adjusted for batch.
## Found 3 genes with uniform expression within a single batch (all zeros); these will not be adjusted for batch.
## Found 4 genes with uniform expression within a single batch (all zeros); these will not be adjusted for batch.
## Found 1 genes with uniform expression within a single batch (all zeros); these will not be adjusted for batch.
## Found 2 genes with uniform expression within a single batch (all zeros); these will not be adjusted for batch.
## Found 3 genes with uniform expression within a single batch (all zeros); these will not be adjusted for batch.
## Found 4 genes with uniform expression within a single batch (all zeros); these will not be adjusted for batch.

1.3.1 Scores

plot_diablo <- lapply(sgccda.ls,
                      function(data_model.ls)
                        mixOmics::plotDiablo(data_model.ls[["block_splsda"]]))

1.3.2 Loadings

plot_loadings <- lapply(names(sgccda.ls),
                        function(gene_tissue.c)
                          mixOmics::plotLoadings(sgccda.ls[[gene_tissue.c]][["block_splsda"]],
                                                 comp = 1,
                                                 legend.color = ProMetIS::palette.vc()[c("WT", substr(gene_tissue.c, 1, 3))],
                                                 contrib = 'max', method = 'median'))

1.3.3 Plot individuals

plot_indiv <- lapply(names(sgccda.ls),
                     function(gene_tissue.c)
                       mixOmics::plotIndiv(sgccda.ls[[gene_tissue.c]][["block_splsda"]],
                                           ind.names = TRUE, 
                                           legend = TRUE,
                                           ellipse = TRUE,
                                           col.per.group = ProMetIS::palette.vc()[c("WT", substr(gene_tissue.c, 1, 3))]))

plot_arrow <- lapply(names(sgccda.ls),
                     function(gene_tissue.c)
                       mixOmics::plotArrow(sgccda.ls[[gene_tissue.c]][["block_splsda"]],
                                           ind.names = FALSE, 
                                           col = ProMetIS::palette.vc()[as.character(sgccda.ls[[gene_tissue.c]][["gene.fc"]])],
                                           title = gene_tissue.c))

1.3.4 Plot variables

plot_var <- lapply(sgccda.ls,
                   function(data_model.ls)
                     mixOmics::plotVar(data_model.ls[["block_splsda"]],
                                       var.names = FALSE, 
                                       style = 'graphics', 
                                       legend = TRUE))

1.3.5 Circos

circos_plot <- lapply(names(sgccda.ls),
                      function(gene_tissue.c)
                        mixOmics::circosPlot(sgccda.ls[[gene_tissue.c]][["block_splsda"]],
                                             cutoff = 0.7,
                                             line = TRUE,
                                             comp = 1,
                                             color.Y = ProMetIS::palette.vc()[c("WT",
                                                                              substr(gene_tissue.c, 1, 3))]))

1.3.6 Heatmap

cim_diablo <- lapply(sgccda.ls,
                     function(data_model.ls)
                       mixOmics::cimDiablo(data_model.ls[["block_splsda"]],
                                           comp = 1,
                                           color.Y = ProMetIS::palette.vc()[c("WT", "LAT")]))

1.3.7 Network

net_work <- lapply(sgccda.ls,
                   function(data_model.ls)
                     mixOmics::network(data_model.ls[["block_splsda"]],
                                       color.node = c("#1F78B4", '#33A02C'),
                                       shape.node = c("rectangle", "rectangle"),
                                       lty.edge = "solid", lwd.edge =  1,
                                       cutoff = 0.75))

1.3.8 Integrating the variable selection results in the dataset

for (set.c in setdiff(ProMetIS::sets.vc(), "preclinical")) {
  
  eset <- latmx2.mset[[set.c]]
  
  fdata.df <- Biobase::fData(eset)
  
  tissue.c <- unlist(strsplit(set.c, "_"))[2]
  
  for (gene.c in ProMetIS::genes.vc()) {
    
    block_splsda <- sgccda.ls[[paste0(gene.c, "_", tissue.c)]][["block_splsda"]]
    
    loadings.ls <- block_splsda[["loadings"]]
    
    loadings.vn <- loadings.ls[[set.c]][, "comp1"]
    
    loadings.vn <- loadings.vn[abs(loadings.vn) > 0]
    
    mixomics.vn <- numeric(nrow(fdata.df))
    names(mixomics.vn) <- rownames(fdata.df)
    
    stopifnot(all(names(loadings.vn) %in% names(mixomics.vn)))
    
    mixomics.vn[names(loadings.vn)] <- loadings.vn
    
    fdata.df[, paste0("mixomics_", gene.c)] <- mixomics.vn
    
  }
  
  Biobase::fData(eset) <- fdata.df
  
  stopifnot(methods::validObject(eset))
  
  latmx2.mset <- MultiDataSet::add_eset(latmx2.mset,
                                        eset,
                                        dataset.type = set.c,
                                        GRanges = NA,
                                        overwrite = TRUE,
                                        warnings = FALSE)
  
}

latmx2.mset <- latmx2.mset[, ProMetIS::sets.vc()]

2 Re-ordering metadata

#latmx2.mset <- ProMetIS::order_mset(latmx2.mset)
latmx2.mset <- ProMetIS:::metadata_select(latmx2.mset, step.c = "4_statistics_integrative")
## Supplementary metadata written in:
## ../inst/extdata/4_statistics_integrative/metadata_supp.rdata

2.0.1 Saving (not run)

phenomis::writing(latmx2.mset,
                  file.path(gsub(ProMetIS::data_dir.c(),
                                   "../../ProMetIS/inst/extdata",
                                   ProMetIS::statistics_integrative_dir.c())),
                  overwrite.l = TRUE,
                  report.c = "none")

References

Rohart, Florian, Benoit Gautier, Amrit Singh, and Kim-Anh Le Cao. 2017. “MixOmics: An R Package for Omics Feature Selection and Multiple Data Integration.” Edited by Dina Schneidman. PLOS Computational Biology 13 (11): e1005752. https://doi.org/10.1371/journal.pcbi.1005752.

Tenenhaus, A., C. Philippe, V. Guillemot, K.-A. Le Cao, J. Grill, and V. Frouin. 2014. “Variable Selection for Generalized Canonical Correlation Analysis.” Biostatistics 15 (3): 569–83. https://doi.org/10.1093/biostatistics/kxu001.

Tenenhaus, Arthur, and Michel Tenenhaus. 2011. “Regularized Generalized Canonical Correlation Analysis.” Psychometrika 76 (2): 257–84. https://doi.org/10.1007/s11336-011-9206-8.